Reading data “income per person” through GitHub

data1 <- read.csv("https://raw.githubusercontent.com/shakirovb/STA553/main/income_per_person.csv")

#head(data1)
#tail(data1)
#I can confirm that data has been read through GitHub successfully

Reshaping data-1 to given conditions

reshdat <- data1 %>%
  gather(key="Year", value="Income", - geo, na.rm=TRUE)

reshdat2 <- reshdat %>%
  mutate(year=substr(Year,2,5)) %>% 
  select(-Year)
dataready <- rename(reshdat2, Country=geo)
head(dataready)
##               Country Income year
## 1         Afghanistan    603 1800
## 2             Albania    667 1800
## 3             Algeria    715 1800
## 4             Andorra   1200 1800
## 5              Angola    618 1800
## 6 Antigua and Barbuda    757 1800
tail(dataready)
##         Country Income year
## 42262   Vanuatu   2900 2018
## 42263 Venezuela  14200 2018
## 42264   Vietnam   6550 2018
## 42265     Yemen   2430 2018
## 42266    Zambia   3870 2018
## 42267  Zimbabwe   1950 2018

Reading data “life expectancy” through GitHub

data2 <- read.csv("https://raw.githubusercontent.com/shakirovb/STA553/main/life_expectancy_years.csv")

#head(data2)
#I can confirm that data has been read through GitHub successfully

Reshaping data-2 based on given conditions

lifeexp <- data2 %>%
  gather(key="Year", value="Life Exp.", - geo, na.rm=TRUE)

lifeexp2 <- lifeexp %>%
  mutate(year=substr(Year,2,5)) %>% 
  select(-Year)
dataready2 <- rename(lifeexp2, Country=geo)
head(dataready2)
##               Country Life Exp. year
## 1         Afghanistan      28.2 1800
## 2             Albania      35.4 1800
## 3             Algeria      28.8 1800
## 5              Angola      27.0 1800
## 6 Antigua and Barbuda      33.5 1800
## 7           Argentina      33.2 1800
tail(dataready2)
##         Country Life Exp. year
## 40948   Vanuatu      64.3 2018
## 40949 Venezuela      75.9 2018
## 40950   Vietnam      74.9 2018
## 40951     Yemen      67.1 2018
## 40952    Zambia      59.5 2018
## 40953  Zimbabwe      60.2 2018

Reshaping data-3 based on given conditions

data3 <- read.csv("https://raw.githubusercontent.com/shakirovb/STA553/main/population_total.csv")
#head(data3)

popdata <- data3 %>%
  gather(key="Year", value="Population_size", - geo, na.rm=TRUE)

popdata2 <- popdata %>%
  mutate(year=substr(Year,2,5)) %>% 
  select(-Year)
dataready3 <- rename(popdata2, Country=geo)
head(dataready3)
##               Country Population_size year
## 1         Afghanistan         3280000 1800
## 2             Albania          410000 1800
## 3             Algeria         2500000 1800
## 4             Andorra            2650 1800
## 5              Angola         1570000 1800
## 6 Antigua and Barbuda           37000 1800

Merging three datasets by Country and Year

datacombo <- merge(dataready,dataready2, by=c("Country", "year"))
#head(datacombo)

datacombo2 <- merge(datacombo, dataready3, by=c("Country", "year"))
head(datacombo2)
##       Country year Income Life Exp. Population_size
## 1 Afghanistan 1800    603      28.2         3280000
## 2 Afghanistan 1801    603      28.2         3280000
## 3 Afghanistan 1802    603      28.2         3280000
## 4 Afghanistan 1803    603      28.2         3280000
## 5 Afghanistan 1804    603      28.2         3280000
## 6 Afghanistan 1805    603      28.2         3280000

Year 2015 based output data frame

datacombo2015 <- datacombo2[which(datacombo2$year== "2015"), names(datacombo2) %in%
                             c("Country", "Income", "Life Exp.", "year", "Population_size")]
#head(datacombo2015)
#tail(datacombo2015)

R Studio based scatter plot with "hover text / pop-ups

myPlotlyLayout <- function(){
  layout(  
      ## graphic size
      with = 700,
      height = 700,
      ### Title 
      title =list(text = "Income vs Life Expectancy in 2015", 
                          font = list(family = "Times New Roman",  # HTML font family  
                                        size = 18,
                                       color = "red")), 
      ### legend
      legend = list(title = list(text = 'Country',
                                 font = list(family = "Courier New",
                                               size = 14,
                                              color = "green")),
                    bgcolor = "ivory",
                    bordercolor = "navy",
                    groupclick = "togglegroup",  # one of  "toggleitem" AND "togglegroup".
                    orientation = "v"  # Sets the orientation of the legend.
                    
                    ),
      ## margin of the plot
      margin = list(
              b = 120,
              l = 50,
              t = 120,
              r = 50
      ),
      ## Background
      plot_bgcolor ='#f7f7f7', 
      ## Axes labels
             xaxis = list( 
                    title=list(text = 'Life Expectancy',
                               font = list(family = 'Arial')),
                    zerolinecolor = 'red', 
                    zerolinewidth = 2, 
                    gridcolor = 'white'), 
            yaxis = list( 
                    title=list(text = 'Income',
                               font = list(family = 'Arial')),
                    zerolinecolor = 'purple', 
                    zerolinewidth = 2, 
                    gridcolor = 'white'),
       ## annotations
       annotations = list(  
                     x = 0.7,   # between 0 and 1. 0 = left, 1 = right
                     y = 0.9,   # between 0 and 1, 0 = bottom, 1 = top
                  font = list(size = 12,
                              color = "darkred"),   
                  text = "The point size is proportional to Population",   
                  xref = "paper",  # "container" spans the entire `width` of the plot. 
                                   # "paper" refers to the width of the plotting area only.  
                  yref = "paper",  #  same as xref
               xanchor = "center", #  horizontal alignment with respect to its x position
               yanchor = "bottom", #  similar to xanchor  
             showarrow = FALSE  
           )
  )
       }
Life_Expectancy <- datacombo2015$`Life Exp.`
Income <- datacombo2015$Income
country <- datacombo2015$Country
year <- datacombo2015$year
popsize <- datacombo2015$Population_size

plot_ly(
    data = datacombo2015,
    x = ~Life_Expectancy,  # Horizontal axis 
    y = ~Income,   # Vertical axis 
    color = ~factor(country),  # must be a numeric factor
     text = ~country,     # show the Country in the hover text
     ## using the following hovertemplate() to add the information of the
     ## two numerical variable to the hover text.
     hovertemplate = paste('<i><b>Income<b></i>: %{y}',
                           '<br><b>Life_Expectancy</b>:  %{x}',
                           '<br><b></b>'),
     alpha  = 0.9,
     size = ~popsize,
     type = "scatter",
     mode = "markers",
    title = "Income vs Life Expectancy in 2015"
   ) 

Saving merged datasets to create better Tableau visualization

write.csv(datacombo2, "/Users/bobbyshakirov/Desktop/WCU\\datacombo3.csv", row.names = TRUE)

#install.packages("writexl")
library("writexl")

#write_xlsx(datacombo2, "/Users/bobbyshakirov/Library/CloudStorage/OneDrive-WestChesterUniversityofPA/STA #553/ASSIGNEMNT\\combodataTableau")